View Javadoc

1   /*
2   jMimeMagic(TM) is a Java library for determining the content type of files or
3   streams.
4   
5   Copyright (C) 2004 David Castro
6   
7   This library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10  version 2.1 of the License, or (at your option) any later version.
11  
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  Lesser General Public License for more details.
16  
17  You should have received a copy of the GNU Lesser General Public
18  License along with this library; if not, write to the Free Software
19  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  
21  For more information, please email arimus@users.sourceforge.net
22  */
23  package net.sf.jmimemagic;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  
28  import java.io.File;
29  import java.io.IOException;
30  import java.io.PrintStream;
31  
32  import java.util.ArrayList;
33  import java.util.Collection;
34  import java.util.HashMap;
35  import java.util.Iterator;
36  
37  
38  /***
39   * This class is the primary class for jMimeMagic
40   *
41   * @author $Author: arimus $
42   * @version $Revision: 1.8 $
43   */
44  public class Magic
45  {
46      private static Log log = LogFactory.getLog(Magic.class);
47      private static boolean initialized = false;
48      private static MagicParser magicParser = null;
49      private static HashMap hintMap = new HashMap();
50  
51      /***
52       * constructor
53       */
54      public Magic()
55      {
56          log.debug("instantiated");
57      }
58  
59      /***
60       * Add a hint to use the specified matcher for the given extension
61       * 
62       * @param extension DOCUMENT ME!
63       * @param matcher DOCUMENT ME!
64       */
65      private static void addHint(String extension, MagicMatcher matcher)
66      {
67          if (hintMap.keySet().contains(extension)) {
68              ArrayList a = (ArrayList) hintMap.get(extension);
69              a.add(matcher);
70          } else {
71              ArrayList a = new ArrayList();
72              a.add(matcher);
73              hintMap.put(extension, a);
74          }
75      }
76  
77      /***
78       * create a parser and initialize it
79       *
80       * @throws MagicParseException DOCUMENT ME!
81       */
82      public static synchronized void initialize()
83          throws MagicParseException
84      {
85          log.debug("initialize()");
86  
87          if (!initialized) {
88              log.debug("initializing");
89              magicParser = new MagicParser();
90              magicParser.initialize();
91  
92              // build hint map
93              Iterator i = magicParser.getMatchers().iterator();
94  
95              while (i.hasNext()) {
96                  MagicMatcher matcher = (MagicMatcher) i.next();
97                  String ext = matcher.getMatch().getExtension();
98  
99                  if ((ext != null) && !ext.trim().equals("")) {
100                     if (log.isDebugEnabled()) {
101                         log.debug("adding hint mapping for extension '" + ext + "'");
102                     }
103 
104                     addHint(ext, matcher);
105                 } else if (matcher.getMatch().getType().equals("detector")) {
106                     String[] exts = matcher.getDetectorExtensions();
107 
108                     for (int j = 0; j < exts.length; j++) {
109                         if (log.isDebugEnabled()) {
110                             log.debug("adding hint mapping for extension '" + exts[j] + "'");
111                         }
112 
113                         addHint(exts[j], matcher);
114                     }
115                 }
116             }
117 
118             initialized = true;
119         }
120     }
121 
122     /***
123      * return the parsed MagicMatch objects that were created from the magic.xml
124      * definitions
125      *
126      * @return the parsed MagicMatch objects
127      *
128      * @throws MagicParseException DOCUMENT ME!
129      */
130     public static Collection getMatchers()
131         throws MagicParseException
132     {
133         log.debug("getMatchers()");
134 
135         if (!initialized) {
136             initialize();
137         }
138 
139         Iterator i = magicParser.getMatchers().iterator();
140         ArrayList m = new ArrayList();
141 
142         while (i.hasNext()) {
143             MagicMatcher matcher = (MagicMatcher) i.next();
144 
145             try {
146                 m.add(matcher.clone());
147             } catch (CloneNotSupportedException e) {
148                 log.error("failed to clone matchers");
149                 throw new MagicParseException("failed to clone matchers");
150             }
151         }
152 
153         return m;
154     }
155 
156     /***
157      * get a match from a stream of data
158      *
159      * @param data DOCUMENT ME!
160      *
161      * @return DOCUMENT ME!
162      *
163      * @throws MagicParseException DOCUMENT ME!
164      * @throws MagicMatchNotFoundException DOCUMENT ME!
165      * @throws MagicException DOCUMENT ME!
166      */
167     public static MagicMatch getMagicMatch(byte[] data)
168         throws MagicParseException, MagicMatchNotFoundException, MagicException
169     {
170         return getMagicMatch(data, false);
171     }
172 
173     /***
174      * get a match from a stream of data
175      *
176      * @param data DOCUMENT ME!
177      * @param onlyMimeMatch DOCUMENT ME!
178      *
179      * @return DOCUMENT ME!
180      *
181      * @throws MagicParseException DOCUMENT ME!
182      * @throws MagicMatchNotFoundException DOCUMENT ME!
183      * @throws MagicException DOCUMENT ME!
184      */
185     public static MagicMatch getMagicMatch(byte[] data, boolean onlyMimeMatch)
186         throws MagicParseException, MagicMatchNotFoundException, MagicException
187     {
188         log.debug("getMagicMatch(byte[])");
189 
190         if (!initialized) {
191             initialize();
192         }
193 
194         Collection matchers = magicParser.getMatchers();
195         log.debug("getMagicMatch(byte[]): have " + matchers.size() + " matchers");
196 
197         MagicMatcher matcher = null;
198         MagicMatch match = null;
199         Iterator i = matchers.iterator();
200 
201         while (i.hasNext()) {
202             matcher = (MagicMatcher) i.next();
203 
204             log.debug("getMagicMatch(byte[]): trying to match: " +
205                 matcher.getMatch().getMimeType());
206 
207             try {
208                 if ((match = matcher.test(data, onlyMimeMatch)) != null) {
209                     log.debug("getMagicMatch(byte[]): matched " + matcher.getMatch().getMimeType());
210 
211                     return match;
212                 }
213             } catch (IOException e) {
214                 log.error("getMagicMatch(byte[]): " + e);
215                 throw new MagicException(e);
216             } catch (UnsupportedTypeException e) {
217                 log.error("getMagicMatch(byte[]): " + e);
218                 throw new MagicException(e);
219             }
220         }
221 
222         throw new MagicMatchNotFoundException();
223     }
224 
225     /***
226      * get a match from a file
227      *
228      * @param file the file to match content in
229      * @param extensionHints whether or not to use extension to optimize order of content tests
230      *
231      * @return the MagicMatch object representing a match in the file
232      *
233      * @throws MagicParseException DOCUMENT ME!
234      * @throws MagicMatchNotFoundException DOCUMENT ME!
235      * @throws MagicException DOCUMENT ME!
236      */
237     public static MagicMatch getMagicMatch(File file, boolean extensionHints)
238         throws MagicParseException, MagicMatchNotFoundException, MagicException
239     {
240         return getMagicMatch(file, extensionHints, false);
241     }
242 
243     /***
244      * get a match from a file
245      *
246      * @param file the file to match content in
247      * @param extensionHints whether or not to use extension to optimize order of content tests
248      * @param onlyMimeMatch only try to get mime type, no submatches are processed when true
249      *
250      * @return the MagicMatch object representing a match in the file
251      *
252      * @throws MagicParseException DOCUMENT ME!
253      * @throws MagicMatchNotFoundException DOCUMENT ME!
254      * @throws MagicException DOCUMENT ME!
255      */
256     public static MagicMatch getMagicMatch(File file, boolean extensionHints, boolean onlyMimeMatch)
257         throws MagicParseException, MagicMatchNotFoundException, MagicException
258     {
259         log.debug("getMagicMatch(File)");
260 
261         if (!initialized) {
262             initialize();
263         }
264 
265         long start = System.currentTimeMillis();
266 
267         MagicMatcher matcher = null;
268         MagicMatch match = null;
269 
270         // check for extension hints
271         ArrayList checked = new ArrayList();
272 
273         if (extensionHints) {
274             log.debug("trying to use hints first");
275 
276             String name = file.getName();
277             int pos = name.lastIndexOf('.');
278 
279             if (pos > -1) {
280                 String ext = name.substring(pos + 1, name.length());
281 
282                 if ((ext != null) && !ext.equals("")) {
283                     if (log.isDebugEnabled()) {
284                         log.debug("using extension '" + ext + "' for hinting");
285                     }
286 
287                     Collection c = (Collection) hintMap.get(ext);
288 
289                     if (c != null) {
290                         Iterator i = c.iterator();
291 
292                         while (i.hasNext()) {
293                             matcher = (MagicMatcher) i.next();
294 
295                             log.debug("getMagicMatch(File): trying to match: " +
296                                 matcher.getMatch().getDescription());
297 
298                             try {
299                                 if ((match = matcher.test(file, onlyMimeMatch)) != null) {
300                                     log.debug("getMagicMatch(File): matched " +
301                                         matcher.getMatch().getDescription());
302 
303                                     if (log.isDebugEnabled()) {
304                                         long end = System.currentTimeMillis();
305                                         log.debug("found match in '" + (end - start) +
306                                             "' milliseconds");
307                                     }
308 
309                                     return match;
310                                 }
311                             } catch (UnsupportedTypeException e) {
312                                 log.error("getMagicMatch(File): " + e);
313                                 throw new MagicException(e);
314                             } catch (IOException e) {
315                                 log.error("getMagicMatch(File): " + e);
316                                 throw new MagicException(e);
317                             }
318 
319                             // add to the already checked list
320                             checked.add(matcher);
321                         }
322                     }
323                 } else {
324                     log.debug("no file extension, ignoring hints");
325                 }
326             } else {
327                 log.debug("no file extension, ignoring hints");
328             }
329         }
330 
331         Collection matchers = magicParser.getMatchers();
332         log.debug("getMagicMatch(File): have " + matchers.size() + " matches");
333 
334         Iterator i = matchers.iterator();
335 
336         while (i.hasNext()) {
337             matcher = (MagicMatcher) i.next();
338 
339             if (!checked.contains(matcher)) {
340                 log.debug("getMagicMatch(File): trying to match: " +
341                     matcher.getMatch().getDescription());
342 
343                 try {
344                     if ((match = matcher.test(file, onlyMimeMatch)) != null) {
345                         log.debug("getMagicMatch(File): matched " +
346                             matcher.getMatch().getDescription());
347 
348                         if (log.isDebugEnabled()) {
349                             long end = System.currentTimeMillis();
350                             log.debug("found match in '" + (end - start) + "' milliseconds");
351                         }
352 
353                         return match;
354                     }
355                 } catch (UnsupportedTypeException e) {
356                     log.error("getMagicMatch(File): " + e);
357                     throw new MagicException(e);
358                 } catch (IOException e) {
359                     log.error("getMagicMatch(File): " + e);
360                     throw new MagicException(e);
361                 }
362             } else {
363                 log.debug("getMagicMatch(File): already checked, skipping: " +
364                     matcher.getMatch().getDescription());
365             }
366         }
367 
368         throw new MagicMatchNotFoundException();
369     }
370 
371     /***
372      * print the contents of a magic file
373      *
374      * @param stream DOCUMENT ME!
375      *
376      * @throws MagicParseException DOCUMENT ME!
377      */
378     public static void printMagicFile(PrintStream stream)
379         throws MagicParseException
380     {
381         if (!initialized) {
382             initialize();
383         }
384 
385         Collection matchers = Magic.getMatchers();
386         log.debug("have " + matchers.size() + " matches");
387 
388         MagicMatcher matcher = null;
389         Iterator i = matchers.iterator();
390 
391         while (i.hasNext()) {
392             matcher = (MagicMatcher) i.next();
393             log.debug("printing");
394             printMagicMatcher(stream, matcher, "");
395         }
396     }
397 
398     /***
399      * print a magic match
400      *
401      * @param stream DOCUMENT ME!
402      * @param matcher DOCUMENT ME!
403      * @param spacing DOCUMENT ME!
404      */
405     private static void printMagicMatcher(PrintStream stream, MagicMatcher matcher, String spacing)
406     {
407         stream.println(spacing + "name: " + matcher.getMatch().getDescription());
408         stream.println(spacing + "children: ");
409 
410         Collection matchers = matcher.getSubMatchers();
411         Iterator i = matchers.iterator();
412 
413         while (i.hasNext()) {
414             printMagicMatcher(stream, (MagicMatcher) i.next(), spacing + "  ");
415         }
416     }
417 
418     /***
419      * print a magic match
420      *
421      * @param stream DOCUMENT ME!
422      * @param match DOCUMENT ME!
423      * @param spacing DOCUMENT ME!
424      */
425     public static void printMagicMatch(PrintStream stream, MagicMatch match, String spacing)
426     {
427         stream.println(spacing + "=============================");
428         stream.println(spacing + "mime type: " + match.getMimeType());
429         stream.println(spacing + "description: " + match.getDescription());
430         stream.println(spacing + "extension: " + match.getExtension());
431         stream.println(spacing + "test: " + new String(match.getTest().array()));
432         stream.println(spacing + "bitmask: " + match.getBitmask());
433         stream.println(spacing + "offset: " + match.getOffset());
434         stream.println(spacing + "length: " + match.getLength());
435         stream.println(spacing + "type: " + match.getType());
436         stream.println(spacing + "comparator: " + match.getComparator());
437         stream.println(spacing + "=============================");
438 
439         Collection submatches = match.getSubMatches();
440         Iterator i = submatches.iterator();
441 
442         while (i.hasNext()) {
443             printMagicMatch(stream, (MagicMatch) i.next(), spacing + "    ");
444         }
445     }
446 
447     /***
448      * DOCUMENT ME!
449      *
450      * @param args DOCUMENT ME!
451      */
452     public static void main(String[] args)
453     {
454         //		Magic magic = new Magic();
455         try {
456             //Magic.initialize();
457             File f = new File(args[0]);
458 
459             if (f.exists()) {
460                 MagicMatch match = Magic.getMagicMatch(f, true, false);
461 
462                 System.out.println("filename: " + args[0]);
463                 printMagicMatch(System.out, match, "");
464 
465                 //				Collection submatches = match.getSubMatches();
466                 //				if (match == null) {
467                 //					System.out.println(args[0]+": unknown");
468                 //				} else {
469                 //					System.out.println("=============================");
470                 //					System.out.println("filename: "+args[0]);
471                 //					System.out.println("mime type: "+match.getMimeType());
472                 //					System.out.println("description: "+match.getDescription());
473                 //					System.out.println("extension: "+match.getExtension());
474                 //					System.out.println("test: "+new String(match.getTest().array()));
475                 //					System.out.println("bitmask: "+match.getBitmask());
476                 //					System.out.println("offset: "+match.getOffset());
477                 //					System.out.println("length: "+match.getLength());
478                 //					System.out.println("type: "+match.getType());
479                 //					System.out.println("comparator: "+match.getComparator());
480                 //					System.out.println("=============================");
481                 //
482                 //					Iterator i = submatches.iterator();
483                 //					while (i.hasNext()) {
484                 //						System.out.println("== SUBMATCH =================");
485                 //						MagicMatch m = (MagicMatch)i.next();
486                 //						System.out.println(m.print());
487                 //						System.out.println("=============================");
488                 //					}
489                 //				}
490 
491                 //				FileInputStream fis = new FileInputStream(f);
492                 //				ByteBuffer buffer = ByteBuffer.allocate((int)f.length());
493                 //				byte []buf = new byte[2048];
494                 //				int size = 0;
495                 //				while ((size = fis.read(buf, 0, 2048)) > 0) {
496                 //					buffer.put(buf, 0, size);
497                 //				}
498                 //				byte []tmp = buffer.array();
499                 //				match = parser.getMagicMatch(tmp);
500                 //				if (match == null) {
501                 //					System.out.println(args[0]+": unknown");
502                 //				} else {
503                 //					System.out.println(args[0]+": "+match.getDescription());
504                 //					System.out.println(match.getMimeType());
505                 //				}
506             } else {
507                 System.err.println("file '" + f.getCanonicalPath() + "' not found");
508             }
509         } catch (MagicMatchNotFoundException e) {
510             System.out.println("no match found");
511         } catch (Exception e) {
512             System.err.println("error: " + e);
513             e.printStackTrace(System.err);
514         }
515     }
516 }